*12345678901234567890123456789012345678901234567890123456789012345678901234567890
capture log close
clear all
set more off

*	************************************************************************
* 	File-Name: 	ReplicationIndicatorsCoding.do
*	Log-file:	na
*	Date:  		09/20/2016
*	Author: 	MA, CYC, JU, KG, AJ
*	Data Used:  ALL_clean_Module.dta
*	Output		ReplicationIndicatorsData.dta
*	Purpose:   	.do file to create the ReplicationIndicatorsData.dta dataset
*	************************************************************************

*	************************************************************************
* 	0. Setting up the data
*	************************************************************************

*	Working directory for data
clear all
use "./RawDataHH.dta", clear



*	************************************************************************
* 	1. Recoding
*	************************************************************************

*	************************************************************************
* 	A. Main DV and IV
*	************************************************************************

* One error in coding (shouldn't be any zero's)
replace m2_q77_electrified_satisfaction = . if m2_q77_electrified_satisfaction == 0

* Rescaling from 0 to 2 instead of 1 to 3
replace m2_q77_electrified_satisfaction = m2_q77_electrified_satisfaction-1
replace m3_q85_light_main_satisfy = m3_q85_light_main_satisfy-1

label define rescale 0 "Unsatisfied" 1 "Neutral" 2 "Satisfied"
label values m2_q77_electrified_satisfaction rescale  
label values m3_q85_light_main_satisfy rescale  


gen rank_energy = .
replace rank_energy = 0 if m6_q137_support_rank_water == 1
replace rank_energy = 0 if m6_q137_support_rank_edu == 1
replace rank_energy = 1 if m6_q137_support_rank_lpg == 1
replace rank_energy = 1 if m6_q137_support_rank_elec == 1
replace rank_energy = 1 if m6_q137_support_rank_kero == 1
label variable rank_energy "Energy Highest Priority ($=$1)"

gen rank_elec = .
replace rank_elec = 1 if m6_q137_support_rank_elec == 1
replace rank_elec = 0 if m6_q137_support_rank_elec != 1 & m6_q137_support_rank_elec != .
label variable rank_elec "Electricity Highest Priority ($=$1)"

gen rank_hh = .
replace rank_hh = 1 if m3_q94_1_rank_household_elec == 1
replace rank_hh = 0 if m3_q94_1_rank_household_elec != 1 & m6_q137_support_rank_elec != .
label variable rank_hh "Electricity Highest Priority ($=$1)"


egen rankstd = std(m6_q137_support_rank_elec)
egen rankaltstd = std(m3_q94_1_rank_household_elec)


gen agricultured = .
replace agricultured = 0 if m1_q31_income_source != .
replace agricultured = 1 if m1_q31_income_source == 1
replace agricultured = 1 if m1_q31_income_source == 2
replace agricultured = 1 if m1_q31_income_source == 3


*	************************************************************************
* 	B. Indicators
*	************************************************************************



*	************************************************************************
* 	1. Capacity
*	Peak power which could be drawn from the electricity connection. 
*	The capacity tiers, though measured in terms of watt rating are designed 
*	to correspond to a set of energy services for each tier. The watt ratings 
*	are determined by aggregating the typical connected load of electrical 
*	appliances used to deliver the specific energy services.
*	************************************************************************


*	Sources:
* All: https://www.duke-energy.com/pdfs/Appliance_OpCost_List_Duke_v8.06.pdf
* LED: http://www.designrecycleinc.com/led%20comp%20chart.html
* Cooler, tube, grinder: http://www.andhranews.net/others/wattage.asp
*			m2_q75_12_no_elec_stove
*			m2_q75_13_no_inverter
*			m2_q75_14_no_elec_pump
gen capacity = .
replace capacity = m2_q75_1_no_Incandescent_bulb*100 +	///
			m2_q75_2_no_cfl_bulb*20 + ///
			m2_q75_3_no_led_light*7 + ///
			m2_q75_5_no_fans*60 + ///
			m2_q75_6_no_irons*1100 + ///
			m2_q75_7_no_fridges*300 + ///
			m2_q75_8_no_tvs*100 + ///
			m2_q75_9_no_radios*15 + ///
			m2_q75_11_no_wash_machine*665 +	///
			m2_q75_10_no_cooler*80 + 	///
			m2_q75_4_no_tube_light*40
replace capacity = capacity + (750+370)/2 if m2_q75_15_elec_other == "GRINDER" 
replace capacity = capacity + 2.6 if m2_q75_15_elec_other == "MOBILE CHARGING"
replace capacity = capacity + 65 if m2_q75_15_elec_other == "COMPUTER"

replace capacity = 0 if m2_q75_1_no_Incandescent_bulb == . &	///
			m2_q75_2_no_cfl_bulb == . & ///
			m2_q75_3_no_led_light == . & ///
			m2_q75_5_no_fans == . & ///
			m2_q75_6_no_irons == . & ///
			m2_q75_7_no_fridges == . & ///
			m2_q75_8_no_tvs == . & ///
			m2_q75_9_no_radios == . & ///
			m2_q75_11_no_wash_machine == .

replace capacity = . if m2_q68_elec == 0



*	************************************************************************
* 	2. Duration
*	Number of hours for which electricity is typically available in a day.
*	************************************************************************

gen duration = m2_q69_elec_hrs
replace duration = 0 if m2_q69_elec_hrs == .
replace duration = . if m2_q68_elec == 0

gen durationNight = m2_q70_elec_night_hrs
replace durationNight = 0 if m2_q70_elec_night_hrs == .
replace durationNight = . if m2_q68_elec == 0


*	************************************************************************
* 	3. Reliability
*	GTF defines reliability as unscheduled vs. scheduled outages. 
*	A generalization that all residential outages in India are unscheduled, 
*	especially in rural areas, is certainly a valid one. Though it can be 
*	argued that unscheduled outages should be the aspirational level, 
*	poor reliability needs to be captured alternatively for India. This is 
*	contingent upon the field data and based on the responses sought in the 
*	survey, it is proposed to use number of days in a month with no power as 
*	a metric for reliability. Given the reality of rural electrification status, 
*	it would be able to a good metric that can aid in comparison and progress, 
*	instead of every household being marked at the lowest tier because it 
*	witnesses unscheduled outages. 
*	************************************************************************

gen reliability = 30-m2_q71_elec_out_days
replace reliability = . if m2_q68_elec == 0

*	************************************************************************
* 	4. Quality
*	Electricity supply has a few attributes that could be used to assess 
*	its quality levels, depending upon the context and end-use. 
*	For the purpose of household access, the proposed indicator is the 
*	number of instances of voltage fluctuations (drops and surges) affecting 
*	the use of appliances of damaging them.  It can be evaluate it on basis of 
*	number of days for high voltage (NH) and low voltage (NL) instances.
*	************************************************************************

gen quality = 30-m2_q73_elec_equi_low_days
replace quality = . if m2_q68_elec == 0


*	************************************************************************
* 	5. Affordability. Only used for comparison with NSS.
*	See Jain, Agrawal, & Ganesan (2014) 
*	************************************************************************

egen monthly_elec_spending = rowtotal(m2_q55_3_grid_spending m2_q58_2_micro_spending m2_q60_8_solar_own_installment), missing
gen yearly_elec_spending = 12*monthly_elec_spending
gen yearly_expenditures = 12*m1_q32_month_expenditure
gen yearly_income = yearly_expenditures + m1_q33_year_save

gen expenditure = yearly_elec_spending/yearly_expenditures
replace expenditure = . if expenditure > 1 & expenditure != .
replace expenditure = . if m2_q68_elec == 0



*	************************************************************************
* 	C. Others
*	************************************************************************

*	Socio-Economic factor
* Step 1: Principal-component factor analysis
* Notice: first factor explains about 23.38% ("Proportion") of total variance. 
* Additional factors don't add much information.

* To change the factor: simply add variables above ", pcf". Don't forget
* the "///" at the end of each line

factor m1_q23_edu ///
	m1_q22_rw_lang	///
	m1_q33_year_save 	///
	m1_q32_month_expenditure  	///
	m1_q34_bank_acc  	///
	m1_q36_land_final  	///
	m1_q37_no_cows  	///
	m1_q37_no_buffalos  	///
	m1_q37_no_chickens  	///
	m1_q37_no_cow_calves  	///
	m1_q37_no_buffalo_calves  	///
	m1_q37_no_goats  	///
	m1_q37_no_other_animals  	///
	m1_q43_no_rooms  	///
	m1_q44_no_beds  	///
	m1_q45_no_tables  	///
	m1_q46_no_chairs  	///
	m1_q47_no_bikes  	///
	m1_q48_no_motos  	///
	m1_q49_no_cookers 	///
	, pcf

*	Step 2: rotate
rotate

*	Step 3: create the factor. 
* To create all factors, replace with "predict factor*". Factors will be
* ordered by strength (factor1 will explain more than factor2, factor2 more
* than factor3, etc.).
predict factor1

*	IDs and fixed effects
encode m1_q8_state, gen(id)
quiet tab m1_q8_state, gen(stated)
quiet tab m1_q9_district, gen(districtd)

*	Categories
quiet tab m1_q23_edu, gen(educd)

*	Standardized measures

egen stdduration = std(duration)
egen stdreliability = std(reliability)
egen stdquality = std(quality)
egen stdcapacity = std(capacity)
egen stdexpenditure = std(expenditure)
egen stddurationNight = std(durationNight)



quiet tab m1_q24_religion, gen(religiond)
quiet tab m1_q25_caste, gen(casted)
encode m1_q8_state, gen(stateid)

quiet tab duration, gen(durationd)
label variable durationd1 "0"

gen stddur_x_reliability = stdduration*stdreliability

*	************************************************************************
* 	2. Final touch
*	************************************************************************

label variable m1_q19_age "Age"
label variable m1_q20_gender "Male"
label variable m1_q23_edu "Education"
label variable educd1 "No Schooling"
label variable educd2 "Up to 5th"
label variable educd3 "Up to 10th"
label variable educd4 "12th Stand./Diploma"
label variable educd5 "Graduate"
label variable m1_q27_no_adults "\# Adults in HH"
label variable m1_q32_month_expenditure "Expenditure/Month"
label variable m1_q33_year_save "Savings/Year"
label variable m1_q34_bank_acc "Bank Account"

label variable duration "Duration"
label variable reliability "Reliability"
label variable quality "Voltage Stability"
label variable capacity "Capacity"
label variable expenditure "Expenditure Share"
label variable m2_q68_elec "Electricity ($=$1)"

label variable stdduration "Duration (std)"
label variable stdreliability "Reliability (std)"
label variable stdquality "Voltage Stability (std)"
label variable stdcapacity "Capacity (std)"
label variable stdexpenditure "Expenditure Share (std)"
label variable stddur_x_reliability "Duration*Reliability (std)"
label variable stddurationNight "Duration (Night) (std)"

label variable stated1 "Bihar"
label variable stated2 "Jharkhand"
label variable stated3 "MP"
label variable stated4 "Odisha"
label variable stated5 "UP"
label variable stated6 "West Bengal"

label variable m1_q29_no_children "\# Children"
label variable religiond1 "Hindu"
label variable religiond2 "Muslim" 
label variable religiond3 "Other Religion" 
label variable casted1 "Scheduled Caste"
label variable casted2 "Scheduled Tribe"
label variable casted3 "Other Backward"
label variable casted4 "General Caste"

label variable agricultured "Farmer"

saveold "ReplicationIndicatorsData.dta", replace v(12)



